Chapter 5 Community composition

load("data/data.Rdata")

5.1 Taxonomy overview

5.1.1 Stacked barplot

# Merge data frames based on sample
transplants_metadata<-sample_metadata%>%
  mutate(Tube_code=str_remove_all(Tube_code, "_a"))
transplants_metadata$newID <- paste(transplants_metadata$Tube_code, "_", transplants_metadata$individual)

merged_data<-genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., transplants_metadata, by = join_by(sample == Tube_code)) %>% #append sample metadata
  filter(count > 0) #filter 0 counts

ggplot(merged_data, aes(x=newID,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    facet_nested(. ~ time_point + type ,  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    labs(fill="Phylum",y = "Relative abundance",x="Sample")+
    theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size=0))

5.1.2 Phylum relative abundances

phylum_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == Tube_code)) %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  group_by(sample,phylum) %>%
  summarise(relabun=sum(count))

phylum_summary %>%
    group_by(phylum) %>%
    summarise(mean=mean(relabun, na.rm=TRUE),sd=sd(relabun, na.rm=TRUE)) %>%
    arrange(-mean) %>%
    tt()
tinytable_6q12fciiexfztvjib856
phylum mean sd
p__Bacteroidota 0.387533390 0.199408886
p__Bacillota_A 0.247736092 0.157285090
p__Bacillota 0.119551625 0.147933278
p__Pseudomonadota 0.094124112 0.158574923
p__Campylobacterota 0.054878320 0.094197988
p__Verrucomicrobiota 0.027807574 0.066974042
p__Desulfobacterota 0.023737705 0.036771812
p__Chlamydiota 0.010961883 0.060750276
p__Fusobacteriota 0.010557257 0.028638395
p__Cyanobacteriota 0.009276509 0.016635288
p__Bacillota_C 0.004811016 0.006724154
p__Spirochaetota 0.004098862 0.012506256
p__Bacillota_B 0.002556218 0.004994779
p__Actinomycetota 0.001281229 0.006458780
p__Elusimicrobiota 0.001088209 0.006278019
phylum_arrange <- phylum_summary %>%
    group_by(phylum) %>%
    summarise(mean=mean(relabun)) %>%
    arrange(-mean) %>%
    select(phylum) %>%
    pull()

phylum_summary %>%
    filter(phylum %in% phylum_arrange) %>%
    mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
    ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum)) +
        scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
        geom_jitter(alpha=0.5) + 
        theme_minimal() + 
        theme(legend.position="none") +
        labs(y="Phylum",x="Relative abundance")

5.2 Taxonomy boxplot

5.2.1 Family

family_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == Tube_code)) %>% #append sample metadata
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,family) %>%
  summarise(relabun=sum(count))

family_summary %>%
    group_by(family) %>%
    summarise(mean=mean(relabun, na.rm=TRUE),sd=sd(relabun, na.rm=TRUE)) %>%
    arrange(-mean) %>%
    tt()
tinytable_0md144trj5fx0vu4t4zp
family mean sd
f__Bacteroidaceae 2.260146e-01 0.1384706235
f__Lachnospiraceae 1.410833e-01 0.1062953893
f__Tannerellaceae 1.045659e-01 0.0799894745
f__Helicobacteraceae 5.448546e-02 0.0937279764
f__Mycoplasmoidaceae 3.756572e-02 0.0767893776
f__Erysipelotrichaceae 3.536287e-02 0.0452140267
f__UBA3700 3.456595e-02 0.0565495010
f__Marinifilaceae 2.794365e-02 0.0272474083
f__Rikenellaceae 2.725202e-02 0.0471735191
f__Enterobacteriaceae 2.687327e-02 0.0929254305
f__Coprobacillaceae 2.627823e-02 0.0907456387
f__ 2.465083e-02 0.0781013121
f__Desulfovibrionaceae 2.373771e-02 0.0367718116
f__DTU072 2.183007e-02 0.0377159876
f__Ruminococcaceae 1.832093e-02 0.0428115194
f__Rhizobiaceae 1.579679e-02 0.0779688169
f__LL51 1.556592e-02 0.0616955422
f__UBA3830 1.512118e-02 0.0441855701
f__Akkermansiaceae 1.224165e-02 0.0317653885
f__Chlamydiaceae 1.096188e-02 0.0607502761
f__Fusobacteriaceae 1.055726e-02 0.0286383947
f__CAG-239 9.138683e-03 0.0152490206
f__Enterococcaceae 8.437601e-03 0.0473906561
f__Gastranaerophilaceae 7.848357e-03 0.0146292952
f__Oscillospiraceae 6.624721e-03 0.0075288565
f__UBA1997 6.613196e-03 0.0315103296
f__Streptococcaceae 6.600789e-03 0.0348230465
f__UBA1242 4.266475e-03 0.0147768596
f__Brevinemataceae 4.098862e-03 0.0125062564
f__Acutalibacteraceae 3.498766e-03 0.0111374416
f__RUG11792 2.921450e-03 0.0255676374
f__Clostridiaceae 2.855351e-03 0.0174153876
f__UBA660 2.600647e-03 0.0118148140
f__Peptococcaceae 2.556218e-03 0.0049947786
f__Acidaminococcaceae 1.980431e-03 0.0051045211
f__CAG-508 1.874529e-03 0.0065256902
f__MGBC116941 1.783700e-03 0.0077801927
f__Moraxellaceae 1.540093e-03 0.0099192011
f__RUG14156 1.428152e-03 0.0045670616
f__Staphylococcaceae 1.411833e-03 0.0051727635
f__Anaerovoracaceae 1.410739e-03 0.0027876311
f__Elusimicrobiaceae 1.088209e-03 0.0062780187
f__CAG-288 9.840222e-04 0.0061275213
f__Anaerotignaceae 9.320656e-04 0.0041174457
f__CALVMC01 7.793540e-04 0.0044385554
f__Eggerthellaceae 6.643755e-04 0.0021620275
f__Massilibacillaceae 6.322621e-04 0.0016561037
f__Mycobacteriaceae 6.168531e-04 0.0061497354
f__UBA1820 4.705627e-04 0.0013078764
f__CAG-274 4.686117e-04 0.0022415212
f__Arcobacteraceae 3.928587e-04 0.0050156837
f__Burkholderiaceae_C 3.835606e-04 0.0048969735
f__Muribaculaceae 3.508548e-04 0.0009525792
f__UBA932 3.295199e-04 0.0011408058
f__Hepatoplasmataceae 3.099135e-04 0.0039567109
f__Rhodobacteraceae 3.068016e-04 0.0039169801
f__Weeksellaceae 2.873650e-04 0.0032049404
f__Eubacteriaceae 1.707442e-04 0.0006844943
f__Sphingobacteriaceae 1.561202e-04 0.0012685229
f__Devosiaceae 1.544841e-04 0.0015368528
f__Pumilibacteraceae 1.324439e-04 0.0007783049
f__WRAU01 9.956857e-05 0.0012712064
f__Peptostreptococcaceae 2.371535e-05 0.0003027773
family_arrange <- family_summary %>%
    group_by(family) %>%
    summarise(mean=sum(relabun)) %>%
    arrange(-mean) %>%
    select(family) %>%
    pull()

family_summary %>%
    left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
    left_join(sample_metadata,by=join_by(sample==Tube_code)) %>%
    filter(family %in% family_arrange[1:20]) %>%
    mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
        scale_color_manual(values=phylum_colors[-8]) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~type)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")

5.2.2 Genus

genus_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == Tube_code)) %>% #append sample metadata
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,genus) %>%
  summarise(relabun=sum(count)) %>%
  filter(genus != "g__")

genus_summary %>%
    group_by(genus) %>%
    summarise(mean=mean(relabun, na.rm=TRUE),sd=sd(relabun, na.rm=TRUE)) %>%
    arrange(-mean) %>%
    tt()
tinytable_4ibmnq7jmu12p4xz7xiu
genus mean sd
g__Bacteroides 1.374441e-01 0.0923562611
g__Parabacteroides 9.843371e-02 0.0803813676
g__Phocaeicola 7.109518e-02 0.0799972428
g__Helicobacter_J 3.115738e-02 0.0603033642
g__Mycoplasmoides 3.115302e-02 0.0765633496
g__Odoribacter 2.605667e-02 0.0268723184
g__Roseburia 2.425344e-02 0.0567044550
g__NHYM01 2.332808e-02 0.0810376677
g__Alistipes 2.224698e-02 0.0287419149
g__Coprobacillus 2.070109e-02 0.0894282233
g__Agrobacterium 1.579679e-02 0.0779688169
g__Akkermansia 1.224165e-02 0.0317653885
g__Fusobacterium_A 1.046073e-02 0.0286441117
g__Kineothrix 9.107908e-03 0.0416218616
g__Proteus 8.976683e-03 0.0694135711
g__Dielma 8.687357e-03 0.0090713197
g__CAG-95 8.238073e-03 0.0207930753
g__JAAYNV01 7.265789e-03 0.0179169564
g__Desulfovibrio 7.219276e-03 0.0214990147
g__UBA866 7.016767e-03 0.0295145125
g__Enterococcus 6.966137e-03 0.0463712943
g__Lactococcus 6.600789e-03 0.0348230465
g__Ureaplasma 6.412700e-03 0.0139267552
g__Parabacteroides_B 6.132159e-03 0.0101543965
g__Lacrimispora 6.028411e-03 0.0098068179
g__CALXRO01 5.977964e-03 0.0313982647
g__Citrobacter 5.896711e-03 0.0340533686
g__NSJ-61 5.745781e-03 0.0202234473
g__Breznakia 5.530147e-03 0.0240721461
g__Clostridium_AQ 5.522246e-03 0.0123487838
g__Bilophila 5.044501e-03 0.0089558435
g__Hungatella_A 4.964136e-03 0.0096921078
g__Escherichia 4.342538e-03 0.0270859242
g__Salmonella 4.319018e-03 0.0148769561
g__UMGS1251 4.312965e-03 0.0073601071
g__MGBC136627 4.305492e-03 0.0164533523
g__Hungatella 4.150386e-03 0.0194068227
g__Clostridium_Q 4.146767e-03 0.0052575243
g__Brevinema 4.098862e-03 0.0125062564
g__Thomasclavelia 4.046233e-03 0.0110779301
g__Scatousia 3.752075e-03 0.0104403539
g__Mailhella 3.745039e-03 0.0104110785
g__Copromonas 3.643508e-03 0.0050495456
g__Enterocloster 3.613702e-03 0.0047492729
g__Ventrimonas 3.566172e-03 0.0071931788
g__Fournierella 3.313097e-03 0.0063192740
g__Limenecus 3.230504e-03 0.0066725343
g__Mucinivorans 3.006847e-03 0.0379999623
g__Lawsonia 2.916613e-03 0.0103686789
g__MGBC133411 2.902785e-03 0.0074333461
g__Caccovivens 2.887473e-03 0.0112659902
g__Sarcina 2.855351e-03 0.0174153876
g__Eisenbergiella 2.796704e-03 0.0069384489
g__Bacteroides_G 2.781473e-03 0.0352463088
g__CAJLXD01 2.730769e-03 0.0088951735
g__Acetatifactor 2.654208e-03 0.0055286194
g__Blautia 2.598789e-03 0.0062369300
g__Velocimicrobium 2.235984e-03 0.0067748392
g__C-19 2.235603e-03 0.0048296119
g__CAZU01 2.189719e-03 0.0066369837
g__Negativibacillus 2.145239e-03 0.0056002700
g__Intestinimonas 2.003816e-03 0.0035552824
g__Rikenella 1.998193e-03 0.0037323264
g__Phascolarctobacterium 1.980431e-03 0.0051045211
g__Butyricimonas 1.886974e-03 0.0042483569
g__RGIG6463 1.855727e-03 0.0040258495
g__MGBC116941 1.783700e-03 0.0077801927
g__JALFVM01 1.712574e-03 0.0038669765
g__Oscillibacter 1.546231e-03 0.0025273862
g__Acinetobacter 1.540093e-03 0.0099192011
g__Pseudoflavonifractor 1.489286e-03 0.0027026675
g__Citrobacter_A 1.444197e-03 0.0061395639
g__Staphylococcus 1.411833e-03 0.0051727635
g__14-2 1.228249e-03 0.0098038984
g__RGIG4733 1.225656e-03 0.0038271024
g__Beduini 1.217163e-03 0.0025500013
g__Scatocola 1.162463e-03 0.0045748144
g__Enterococcus_A 1.123893e-03 0.0100947603
g__UBA1436 1.088209e-03 0.0062780187
g__Faecisoma 1.057958e-03 0.0056043491
g__RGIG9287 9.993630e-04 0.0094920364
g__CAG-345 9.840222e-04 0.0061275213
g__Lachnotalea 9.593025e-04 0.0033990676
g__Blautia_A 9.546974e-04 0.0029560949
g__Ruthenibacterium 8.602962e-04 0.0024818365
g__CAG-269 8.276280e-04 0.0048017072
g__Marseille-P3106 8.230475e-04 0.0017874580
g__WRHT01 6.666234e-04 0.0027445999
g__Eggerthella 6.643755e-04 0.0021620275
g__CHH4-2 6.371240e-04 0.0020328940
g__Corynebacterium 6.168531e-04 0.0061497354
g__Serratia_A 6.076344e-04 0.0077577570
g__Anaerotruncus 6.058602e-04 0.0016447558
g__RUG14156 5.735678e-04 0.0021869659
g__RGIG1896 5.683407e-04 0.0051791669
g__IOR16 5.574841e-04 0.0016418264
g__Faecimonas 5.146607e-04 0.0054508437
g__CAG-56 5.096368e-04 0.0016613952
g__MGBC140009 4.851579e-04 0.0024491799
g__CALURL01 4.805911e-04 0.0017020401
g__Merdimorpha 4.705627e-04 0.0013078764
g__RGIG8482 4.560993e-04 0.0030287706
g__Enterobacter 4.223379e-04 0.0042068345
g__Klebsiella 4.203682e-04 0.0049802041
g__Caccenecus 4.086273e-04 0.0018112589
g__Aliarcobacter 3.928587e-04 0.0050156837
g__Scatenecus 3.851876e-04 0.0018282510
g__Alcaligenes 3.835606e-04 0.0048969735
g__Plesiomonas 3.766988e-04 0.0027593254
g__JAHHSE01 3.529590e-04 0.0014998851
g__HGM05232 3.508548e-04 0.0009525792
g__Enterococcus_B 3.475714e-04 0.0022665993
g__Egerieousia 3.295199e-04 0.0011408058
g__Stoquefichus 3.137462e-04 0.0020871798
g__Hepatoplasma 3.099135e-04 0.0039567109
g__Paracoccus 3.068016e-04 0.0039169801
g__Moheibacter 2.873650e-04 0.0032049404
g__Scatomorpha 2.738230e-04 0.0010358302
g__Emergencia 2.601331e-04 0.0013298673
g__UBA7185 2.523935e-04 0.0014817660
g__Eubacterium 1.707442e-04 0.0006844943
g__Sphingobacterium 1.561202e-04 0.0012685229
g__Devosia 1.544841e-04 0.0015368528
g__Anaerosporobacter 1.507638e-04 0.0012978048
g__Caccomorpha 1.434035e-04 0.0010730603
g__UBA2658 1.355578e-04 0.0007332702
g__Protoclostridium 1.324439e-04 0.0007783049
g__Angelakisella 1.315171e-04 0.0009387427
g__Cetobacterium_A 9.652924e-05 0.0008876688
g__Rahnella 6.708891e-05 0.0008565338
g__Peptostreptococcus 2.371535e-05 0.0003027773
genus_arrange <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=sum(relabun)) %>%
    filter(genus != "g__")%>%
    arrange(-mean) %>%
    select(genus) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    pull()

genus_summary %>%
    left_join(genome_metadata %>% select(genus,phylum) %>% unique(),by=join_by(genus==genus)) %>%
    left_join(sample_metadata,by=join_by(sample==Tube_code)) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    filter(genus %in% genus_arrange[1:20]) %>%
    mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
        scale_color_manual(values=phylum_colors[-c(3,4,6,8)]) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~type)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")